<?php
!defined('DEBUG') AND exit('Access Denied.');
require_once APP_PATH . 'plugin/ghx_parsedoc/inc/domfuns.php';
const KEYWORD_URL = 'http://comdo.hanlp.com/hanlp/v1/keyword/extract';
const KEYWORD_TOKEN = '0470595d064f41a7837ce5a7050f94f81605717184238token';
const PHRASE_URL = 'http://comdo.hanlp.com/hanlp/v1/phrase/extract';
const PHRASE_TOKEN = '8b5279c1b3bd4c73853c3cd6ff135dd71605717351713token';
const GET_KEY_COUNT = 256;



$down_file_name = '';
$err_message = '';

$oldpath = param('oldpath');
$texter = param('texter');
$fd_name = param('fdname');
$question_count = param('questioncount',0);//指定问题数量
$validpwd = false;
$blacklist = array('<br/>','答','在','但是');//黑名单
$phrase_weight = array(1.0,0.9,0.8,0.7,0.7,0.7,0.6,0.6);//短语排名权重
$keyword_weight = array(0.8,0.75,0.7,0.65,0.65,0.65,0.6,0.6);//关键词排名权重
$phrase_allow = true;//允许调用短语提取?
$keyword_allow = false;//允许调用关键词提取?


if(array_key_exists('optpwd',$_REQUEST)){
    if($_REQUEST['optpwd'] == 'HmGd19761125!') $validpwd = true;
    if($validpwd){
        if(!empty($texter) || !empty($oldpath)){
            if(!empty($oldpath)){
                if(stripos($oldpath,'upload/') === FALSE && stripos($oldpath,'upload\\')=== FALSE){
                    $oldpath = 'upload/'.$oldpath;
                }
                $oldpath = APP_PATH .$oldpath;
            }
            else{
                $oldpath = APP_PATH. "upload/textkey_" . date("YmdHis") . ".txt";
            }
            $f = fopen($oldpath,"a+");
            ini_set("max_execution_time", 2700); // 修改最大执行时间s 45分钟
            ini_set("memory_limit", 1048576000); //修改此次的最大运行内存 Byte 1000 兆，即 1G
            if(!empty($texter)){
                fwrite($f,$texter);
            }
            fseek($f,0,SEEK_SET);
            $allKeys = array();
            while (!feof($f)) {
                $line = trim(fgets($f));
                if(!empty($line)){
                    $retw = array();
                    if(xn_strlen($line) > 5) {
                        if($phrase_allow) {
                            $retstr1 = do_http_post(PHRASE_URL, array('text' => $line, 'size' => 5), '', 'token:' . PHRASE_TOKEN . "\r\n");
                            if (!empty($retstr1)) $retdata1 = xn_json_decode($retstr1);
                            if ($retdata1['code'] == 0) {
                                $wz = 0;
                                foreach ($retdata1['data'] as $data2) {
                                    if (!array_key_exists($data2['word'], $retw)) $retw[$data2['word']] = $phrase_weight[$wz++];
                                }
                            }
                        }

                        if($keyword_allow) {
                            $retstr2 = do_http_post(KEYWORD_URL, array('text' => $line, 'size' => 3), '', 'token:' . KEYWORD_TOKEN . "\r\n");
                            if (!empty($retstr2)) $retdata2 = xn_json_decode($retstr2);
                            if ($retdata2['code'] == 0) {
                                $wz = 0;
                                foreach ($retdata2['data'] as $data2) {
                                    if (!array_key_exists($data2['word'], $retw)) $retw[$data2['word']] = $keyword_weight[$wz++];
                                }
                            }
                        }
                    }
                    else{
                        $retw[$line] = 1;
                    }
                    foreach ($retw as $tk => $tw){
                        if(!in_array($tk,$blacklist)){
                            if(isset($allKeys[$tk])) $allKeys[$tk] += $tw;
                            else $allKeys[$tk] = $tw;
                        }
                    }
                }
            }

            arsort($allKeys,SORT_NUMERIC);
            $r =0;
            foreach($allKeys as $k => $v){
                if($r++ < GET_KEY_COUNT){
                    $err_message .= $k."\t".$v.PHP_EOL;
                }
                else
                    break;
            }

        }
    }
}


$header['title'] ='北京慧明国鼎科技有限公司';
$header['description'] = '深度防御，正义军工让弱势民族拨云见日！我们带领AI为人类服务走向智慧生活,构建生态文明。';
include _include(APP_PATH.'plugin/ghx_parsedoc/htm/textag.htm');